/*******************************************************************************
Paper title: Does early nutrition predict cognitive skills during later childhood?
Evidence from two developing countries
Alan Sanchez, Marta Favara, Margaret Sheridan, Jere Behrman.
Created: 8 OCt 2020
This version: 4 Dec 2023 
Content: Figure B3
*******************************************************************************/
		
clear all
local user Alan

if "`user'"=="Alan" {
global outdata	C:\Users\alans\Dropbox\_NIHR21_Proposal\NIH Papers\Paper 1_nutrition&EF\paper\WD\Replication files\Data
global output   C:\Users\alans\Dropbox\_NIHR21_Proposal\NIH Papers\Paper 1_nutrition&EF\paper\WD\Replication files\Output
}

global control0  chage_r4 chage_r4_2 female dm_educ2 dm_educ3 dm_educ4 urban_c /*singleparent_r1*/  hhsize_c
global country1  mom_spanish
global country2  mom_oromifa mom_tigrina mom_other
	
global control2q wi_cq2 wi_cq3 wi_cq4 wi_cq5
global control3q educexp_all_r2q2 educexp_all_r2q3 educexp_all_r2q4 educexp_all_r2q5
global control4q nfoodexp_r2q2 nfoodexp_r2q3 nfoodexp_r2q4 nfoodexp_r2q5
	
global task1 	perc_holeinone_r1
global task2 	resptimeave_same    /*correctn_opp timeoutn_all*/
global task3 	euclid_del                  /*     timeoutn_all */
global task5    euclid_dot                  /*     timeoutn_all */
global task4 	resptimeave_unpatt  /*count_patt count_unpatt */
global alltask 	hr2 hr3 wkend practices
		
global cluster_r1 sib_y sib_o /*dclustid_r11- dclustid_r120*/

global btask1 	perc_holeinone_r1
global btask2 	resptimeave_same
global btask3 	euclid_del
global btask5   euclid_dot
global btask4 	resptimeave_unpatt
	
global sample1  zhfa_c<=6 & zhfa_c>=-6
global sample2  zhfa_c<=6 & zhfa_c>=-6

global absorb   clustid_r1
global indlevel pride_index_r4 agency_index_r4 sesteem_index_r4 ppvtz_r4 missing_ppvt

*---------------------------------------------------------------------------------------------------------------------*---------------------------------------------------------------------------------------------------------------------*------------------------------------------------------------------------------------------------------------------*/

use "$outdata\pe_et_childlevel_230821", clear

bysort childid sibling: egen n=max(task) /* keep only paired-siblings who answer all tasks*/
drop if n!=4 /*22 obs*/
drop n
est clear

gen sib_y=0
gen sib_o=0
replace sib_y=1 if sibling==1 & sibyounger==1
replace sib_o=1 if sibling==1 & sibyounger==0 

gen zhfa_c=.
replace zhfa_c=zhfa_r3 if sibling==1
replace zhfa_c=zhfa_r2 if sibling==0

gen stunting_c=.
replace stunting_c=stunting_r3 if sibling==1
replace stunting_c=stunting_r2 if sibling==0

gen zhfaxsib_y=zhfa_c*sib_y
gen zhfaxsib_o=zhfa_c*sib_o
gen zhfaxage=zhfa_c*chage_r4

gen stuntingxsib_y=stunting_c*sib_y
gen stuntingxsib_o=stunting_c*sib_o
gen stuntingxage  =stunting_c*chage_r4

gen zhfaxfem=zhfa_c*female

gen stuntingxfem=stunting_c*female

gen chage_r4_2=chage_r4*chage_r4

rename euclideandave_delay_0 euclid_del 
rename euclideandave_dot_0   euclid_dot		
	
label var stunting_c     "Stunted"
label var stuntingxsib_y "Stunted x younger sib"
label var stuntingxsib_o "Stunted x older sib"
label var stuntingxfem   "Stunted x female"

label var zhfa_c         "Height for age"
label var zhfaxsib_y     "Height for age x younger sib"
label var zhfaxsib_o     "Height for age x older sib"
label var zhfaxfem       "Height for age x female"

label var chage_r4     "Age in months, r4"
label var chage_r4_2   "Age in months squared, r4"
label var female       "Child is female"
label var dm_educ2     "Maternal edu: complete primary"
label var dm_educ3     "Maternal edu: complete secondary"
label var dm_educ4     "Maternal edu: complete tertiary"
label var urban_r1     "Urban area, r1"
label var hhsize_r2    "Household size, r2" 
label var mom_spanish  "Maternal native tongue: spanish"
label var mom_oromifa  "Maternal native tongue: oromifah" 
label var mom_tigrina  "Maternal native tongue: tigrina"
label var mom_other    "Maternal native tongue: other"
label var wi_r1q2      "Wealth index quintile 2"
label var wi_r1q3      "Wealth index quintile 3"
label var wi_r1q4      "Wealth index quintile 4"
label var wi_r1q5      "Wealth index quintile 5"

***Index children
gen     zhfa_5 =zhfa_r2     if (zhfa_r2>=-6 & zhfa_r2<=6) & sibling==0                 & country==2 
gen     zhfa_12=zhfa_r4     if (zhfa_r4>=-6 & zhfa_r4<=6) & sibling==0                 & country==2 

***Older siblings
replace zhfa_12=zhfa_r3 if (zhfa_r3>=-6 | zhfa_r3<=6) & sibling==1 & sibyounger==0 & country==2 

***PPVT
gen missing_ppvt=.
replace missing_ppvt=0 if country==2 & ppvtz_r4!=.
replace missing_ppvt=1 if country==2 & ppvtz_r4==. 
label var missing_ppvt "Missing PPVT"
tab missing_ppvt
replace ppvtz_r4=10000 if ppvtz_r4==.

***Socio emotional outcomes
gen missing_pride_r4=.
replace missing_pride=0 if country==2 & pride_index_r4!=.
replace missing_pride=1 if country==2 & pride_index_r4==. 
label var missing_pride "Missing pride index"
tab missing_pride
replace pride_index_r4=10000 if pride_index_r4==.

gen missing_agency_r4=.
replace missing_agency=0 if country==2 & agency_index_r4!=.
replace missing_agency=1 if country==2 & agency_index_r4==. 
label var missing_agency "Missing agency index"
tab missing_agency
replace agency_index_r4=10000 if agency_index_r4==.

gen missing_sesteem_r4=.
replace missing_sesteem=0 if country==2 & sesteem_index_r4!=.
replace missing_sesteem=1 if country==2 & sesteem_index_r4==. 
label var missing_sesteem "Missing self-esteem index"
tab missing_sesteem
replace sesteem_index_r4=10000 if sesteem_index_r4==.

foreach x of varlist zhfa_5 zhfa_12 pride_index_r4 agency_index_r4 sesteem_index_r4 ppvtz_r4 missing_ppvt ///
hsleep_r4 hcare_r4 hchore_r4 htask_r4 hwork_r4 hschool_r4 hstudy_r4 hplay_r4 female ///
dm_educ2 dm_educ3 dm_educ4 urban_c hhsize_c mom_oromifa mom_tigrina mom_other wi_cq2 wi_cq3 wi_cq4 wi_cq5 {
	gen `x'xage  =. 
	gen `x'xage2 =.
	replace `x'xage  =`x'*chage_r2           if sibling==0 
	replace `x'xage  =`x'*chage_r3           if sibling==1
	replace `x'xage2 =`x'*chage_r2*chage_r2  if sibling==0
	replace `x'xage2 =`x'*chage_r3*chage_r3  if sibling==1
}

forvalues x= 1/20 {
	gen clust`x'_=0
	replace clust`x'_=1 if clustid_r1==`x'
}

*************************************************************************
********** PREDICTION MODEL FOR OLDER SIBLINGS
*************************************************************************
keep if country==2
keep if task1!=.
keep country childid sibling chage_r2 chage_r3 chage_r4 chage_r5 zhfa_r2 zhfa_r3 zhfa_r4 zhfa_r5 sibyounger ///
missing_ppvt ///
pride_index_r4      agency_index_r4      sesteem_index_r4  ///
ppvtz_r4            missing_ppvt         female ///
hcare_r4            hchore_r4            htask_r4   ///
hwork_r4            hschool_r4           hstudy_r4       hplay_r4 ///
dm_educ2            dm_educ3             dm_educ4       ///
urban_c             hhsize_c             mom_oromifa      mom_tigrina     mom_other ///     
wi_cq2              wi_cq3               wi_cq4               wi_cq5 ///
pride_index_r4xage  agency_index_r4xage  sesteem_index_r4xage ///
ppvtz_r4xage        missing_ppvtxage     femalexage ///
hcare_r4xage        hchore_r4xage        htask_r4xage  ///
hwork_r4xage        hschool_r4xage       hstudy_r4xage   hplay_r4xage ///
dm_educ2xage        dm_educ3xage         dm_educ4xage    ///
urban_cxage         hhsize_cxage         mom_oromifaxage  mom_tigrinaxage  mom_otherxage ///
wi_cq2xage          wi_cq3xage           wi_cq4xage           wi_cq5xage ///
pride_index_r4xage2 agency_index_r4xage2 sesteem_index_r4xage2 ///
ppvtz_r4xage2       missing_ppvtxage2    femalexage2 ///
hcare_r4xage2       hchore_r4xage2       htask_r4xage2 ///
hwork_r4xage2       hschool_r4xage2      hstudy_r4xage2  hplay_r4xage2 ///
dm_educ2xage2       dm_educ3xage2        dm_educ4xage2   ///
urban_cxage2        hhsize_cxage2        mom_oromifaxage2 mom_tigrinaxage2 mom_otherxage2 ///
wi_cq2xage2         wi_cq3xage2          wi_cq4xage2          wi_cq5xage2 ///
clust2_  clust3_  clust4_  clust5_  clust6_  ///
clust7_  clust8_  clust9_  clust10_ clust11_ ///
clust12_ clust13_ clust14_ clust15_ clust16_ ///
clust17_ clust18_ clust19_ clust20_ 

*Own reference
gen zhfa_ref=.
replace zhfa_ref=zhfa_r4 if sibling==0                 /* reference haz for index children */
replace zhfa_ref=zhfa_r4 if sibling==1 & sibyounger==1 /* reference haz for younger siblings */
replace zhfa_ref=zhfa_r3 if sibling==1 & sibyounger==0 /* reference haz for older siblings */
gen chage_ref=.
replace chage_ref=chage_r4 if sibling==0                 /* reference age for index children */
replace chage_ref=chage_r4 if sibling==1 & sibyounger==1 /* reference haz for younger siblings */
replace chage_ref=chage_r3 if sibling==1 & sibyounger==0 /* reference haz for older siblings */
gen chage_refx2=chage_ref*chage_ref

*Sibling reference
gen junk1=zhfa_r4 if sibling==0 
gen junk2=zhfa_r4 if sibling==1 
bys childid: egen zhfa_ind=total(junk1)
bys childid: egen zhfa_sib=total(junk2)
order childid sibling zhfa_ind zhfa_sib junk1 junk2
replace zhfa_sib=. if zhfa_sib==0 & childid!="ET090092" & childid!="ET130071" & childid!="ET150097"
drop junk1 junk2

gen junk1=chage_r4 if sibling==0
gen junk2=chage_r4 if sibling==1 
bys childid: egen chage_ind=total(junk1)
bys childid: egen chage_sib=total(junk2)
drop junk1 junk2

gen zhfa_ref2=.
replace zhfa_ref2=zhfa_ind if sibling==1 
replace zhfa_ref2=zhfa_sib if sibling==0
drop zhfa_ind zhfa_sib

gen chage_ref2=.
replace chage_ref2=chage_ind if sibling==1 
replace chage_ref2=chage_sib if sibling==0
gen chage_ref2x2=chage_ref2*chage_ref2

drop chage_ind chage_sib

foreach x of varlist zhfa_ref chage_ref zhfa_ref2 chage_ref2 {
	gen `x'xage  =. 
	gen `x'xage2 =.
	replace `x'xage  =`x'*chage_r2           if sibling==0 
	replace `x'xage2 =`x'*chage_r2*chage_r2  if sibling==0
	replace `x'xage  =`x'*chage_r3           if sibling==1 
	replace `x'xage2 =`x'*chage_r3*chage_r3  if sibling==1 
}

egen id=group(childid sibling)
reshape long chage_r zhfa_r, i(id) j(ronda)
drop if sibling==1 & ronda==2          /* dropping empty rows */
drop if ronda==5                       /*round 5 is not needed*/
drop if ronda==4                       /*in no case we used an outcome from round 4*/
drop if sibling==0 & ronda==3       /*dropping this because it models haz r3 on haz r3 for index children*/

gen sample_pred=.
replace sample_pred=0 if sibyounger==0
replace sample_pred=1 if sibyounger==1 & ronda==3
replace sample_pred=1 if sibling==0    & ronda==2

gen chage_rx2=chage_r*chage_r
gen chage_rx3=chage_r*chage_r*chage_r

rename sesteem_index_r4      sesteem_r4
rename sesteem_index_r4xage  sesteem_r4xage
rename sesteem_index_r4xage2 sesteem_r4xage2

label var chage_r        "Current age"
label var chage_rx2      "Current age squared"

label var chage_ref      "Own reference age" 
label var chage_refx2    "Own reference age squared" 
label var zhfa_ref       "Own reference haz"
label var zhfa_refxage   "Own reference haz x current age"
label var zhfa_refxage2  "Own reference haz x current age squared"

label var chage_ref2     "Sibling reference age" 
label var chage_ref2x2   "Sibling reference age squared" 
label var zhfa_ref2      "Sibling reference haz"
label var zhfa_ref2xage  "Sibling reference haz x current age"
label var zhfa_ref2xage2 "Sibling reference haz x current age squared"

******************************
***Lasso Prediction model 
******************************

lasso linear  zhfa_r (chage_ref  chage_refx2  zhfa_ref  zhfa_refxage  zhfa_refxage2 ///
chage_ref2 chage_ref2x2 zhfa_ref2 zhfa_ref2xage zhfa_ref2xage2) ///
missing_ppvt ///
pride_index_r4      agency_index_r4      sesteem_r4  ///
ppvtz_r4            missing_ppvt         female ///
hcare_r4            hchore_r4            htask_r4   ///
hwork_r4            hschool_r4           hstudy_r4       hplay_r4 ///
dm_educ2            dm_educ3             dm_educ4       ///
urban_c             hhsize_c             mom_oromifa      mom_tigrina     mom_other ///     
wi_cq2              wi_cq3               wi_cq4               wi_cq5 ///
pride_index_r4xage  agency_index_r4xage  sesteem_r4xage ///
ppvtz_r4xage        missing_ppvtxage     femalexage ///
hcare_r4xage        hchore_r4xage        htask_r4xage  ///
hwork_r4xage        hschool_r4xage       hstudy_r4xage   hplay_r4xage ///
dm_educ2xage        dm_educ3xage         dm_educ4xage    ///
urban_cxage         hhsize_cxage         mom_oromifaxage  mom_tigrinaxage  mom_otherxage ///
wi_cq2xage          wi_cq3xage           wi_cq4xage           wi_cq5xage ///
pride_index_r4xage2 agency_index_r4xage2 sesteem_r4xage2 ///
ppvtz_r4xage2       missing_ppvtxage2    femalexage2 ///
hcare_r4xage2       hchore_r4xage2       htask_r4xage2 ///
hwork_r4xage2       hschool_r4xage2      hstudy_r4xage2  hplay_r4xage2 ///
dm_educ2xage2       dm_educ3xage2        dm_educ4xage2   ///
urban_cxage2        hhsize_cxage2        mom_oromifaxage2 mom_tigrinaxage2 mom_otherxage2 ///
wi_cq2xage2         wi_cq3xage2          wi_cq4xage2          wi_cq5xage2 ///
clust2_  clust3_  clust4_  clust5_  clust6_  ///
clust7_  clust8_  clust9_  clust10_ clust11_ ///
clust12_ clust13_ clust14_ clust15_ clust16_ ///
clust17_ clust18_ clust19_ clust20_ ///
if sample_pred==1 & zhfa_r<=6 & zhfa_r>=-6, rseed(3215) 

cvplot
lassoknots
lassocoef, display(coef, postselection)

gen     chage_real=chage_r
replace chage_r=60
replace chage_rx2=3600

foreach x of varlist pride_index_r4 agency_index_r4 sesteem_r4 ppvtz_r4 missing_ppvt ///
hcare_r4 hchore_r4 htask_r4 hwork_r4 hschool_r4 hstudy_r4 hplay_r4 female ///
dm_educ2 dm_educ3 dm_educ4 urban_c hhsize_c mom_oromifa mom_tigrina mom_other wi_cq2 wi_cq3 wi_cq4 wi_cq5 {
	drop `x'xage
	drop `x'xage2
	gen `x'xage  =`x'*60 
	gen `x'xage2 =`x'*3600 
}

foreach x of varlist zhfa_ref chage_ref zhfa_ref2 chage_ref2 {
	drop `x'xage
	drop `x'xage2
	gen `x'xage  =`x'*60 
	gen `x'xage2 =`x'*3600 
}

***Predicting
predict zhfa_5_p , postselection
drop             if (zhfa_5_p<-6 | zhfa_5_p>6)

*************************************************************************
********** FIGURE B3  ***************************************************
*************************************************************************

***For children age 60
scatter zhfa_r zhfa_5_p if chage_real==60  & zhfa_r<=6 & zhfa_r>=-6, ///
xtitle(Predicted value) ytitle(Observed value round 2) title(Children observed at age 60 months)
graph save "$output\figB3I.gph", replace

***Older
scatter zhfa_r zhfa_5_p if sample_pred==0 & zhfa_r<=6 & zhfa_r>=-6, ///
xtitle(Predicted value) ytitle(Observed value round 3) title(Older siblings)
graph save "$output\figB3II.gph", replace
